Import Libraries¶

In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
In [2]:
# Load the dataset
data = pd.read_csv("world_population.csv")
In [3]:
# Diasplay the loaded dataset
data
Out[3]:
Rank CCA3 Country/Territory Capital Continent 2022 Population 2020 Population 2015 Population 2010 Population 2000 Population 1990 Population 1980 Population 1970 Population Area (km²) Density (per km²) Growth Rate World Population Percentage
0 36 AFG Afghanistan Kabul Asia 41128771 38972230 33753499 28189672 19542982 10694796 12486631 10752971 652230 63.0587 1.0257 0.52
1 138 ALB Albania Tirana Europe 2842321 2866849 2882481 2913399 3182021 3295066 2941651 2324731 28748 98.8702 0.9957 0.04
2 34 DZA Algeria Algiers Africa 44903225 43451666 39543154 35856344 30774621 25518074 18739378 13795915 2381741 18.8531 1.0164 0.56
3 213 ASM American Samoa Pago Pago Oceania 44273 46189 51368 54849 58230 47818 32886 27075 199 222.4774 0.9831 0.00
4 203 AND Andorra Andorra la Vella Europe 79824 77700 71746 71519 66097 53569 35611 19860 468 170.5641 1.0100 0.00
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
229 226 WLF Wallis and Futuna Mata-Utu Oceania 11572 11655 12182 13142 14723 13454 11315 9377 142 81.4930 0.9953 0.00
230 172 ESH Western Sahara El Aaiún Africa 575986 556048 491824 413296 270375 178529 116775 76371 266000 2.1654 1.0184 0.01
231 46 YEM Yemen Sanaa Asia 33696614 32284046 28516545 24743946 18628700 13375121 9204938 6843607 527968 63.8232 1.0217 0.42
232 63 ZMB Zambia Lusaka Africa 20017675 18927715 16248230 13792086 9891136 7686401 5720438 4281671 752612 26.5976 1.0280 0.25
233 74 ZWE Zimbabwe Harare Africa 16320537 15669666 14154937 12839771 11834676 10113893 7049926 5202918 390757 41.7665 1.0204 0.20

234 rows × 17 columns

In [4]:
# Data Types and Missing Values
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 234 entries, 0 to 233
Data columns (total 17 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   Rank                         234 non-null    int64  
 1   CCA3                         234 non-null    object 
 2   Country/Territory            234 non-null    object 
 3   Capital                      234 non-null    object 
 4   Continent                    234 non-null    object 
 5   2022 Population              234 non-null    int64  
 6   2020 Population              234 non-null    int64  
 7   2015 Population              234 non-null    int64  
 8   2010 Population              234 non-null    int64  
 9   2000 Population              234 non-null    int64  
 10  1990 Population              234 non-null    int64  
 11  1980 Population              234 non-null    int64  
 12  1970 Population              234 non-null    int64  
 13  Area (km²)                   234 non-null    int64  
 14  Density (per km²)            234 non-null    float64
 15  Growth Rate                  234 non-null    float64
 16  World Population Percentage  234 non-null    float64
dtypes: float64(3), int64(10), object(4)
memory usage: 31.2+ KB
In [5]:
# Show the Top Values
data.head()
Out[5]:
Rank CCA3 Country/Territory Capital Continent 2022 Population 2020 Population 2015 Population 2010 Population 2000 Population 1990 Population 1980 Population 1970 Population Area (km²) Density (per km²) Growth Rate World Population Percentage
0 36 AFG Afghanistan Kabul Asia 41128771 38972230 33753499 28189672 19542982 10694796 12486631 10752971 652230 63.0587 1.0257 0.52
1 138 ALB Albania Tirana Europe 2842321 2866849 2882481 2913399 3182021 3295066 2941651 2324731 28748 98.8702 0.9957 0.04
2 34 DZA Algeria Algiers Africa 44903225 43451666 39543154 35856344 30774621 25518074 18739378 13795915 2381741 18.8531 1.0164 0.56
3 213 ASM American Samoa Pago Pago Oceania 44273 46189 51368 54849 58230 47818 32886 27075 199 222.4774 0.9831 0.00
4 203 AND Andorra Andorra la Vella Europe 79824 77700 71746 71519 66097 53569 35611 19860 468 170.5641 1.0100 0.00
In [6]:
# Show the Bottom Values
data.tail()
Out[6]:
Rank CCA3 Country/Territory Capital Continent 2022 Population 2020 Population 2015 Population 2010 Population 2000 Population 1990 Population 1980 Population 1970 Population Area (km²) Density (per km²) Growth Rate World Population Percentage
229 226 WLF Wallis and Futuna Mata-Utu Oceania 11572 11655 12182 13142 14723 13454 11315 9377 142 81.4930 0.9953 0.00
230 172 ESH Western Sahara El Aaiún Africa 575986 556048 491824 413296 270375 178529 116775 76371 266000 2.1654 1.0184 0.01
231 46 YEM Yemen Sanaa Asia 33696614 32284046 28516545 24743946 18628700 13375121 9204938 6843607 527968 63.8232 1.0217 0.42
232 63 ZMB Zambia Lusaka Africa 20017675 18927715 16248230 13792086 9891136 7686401 5720438 4281671 752612 26.5976 1.0280 0.25
233 74 ZWE Zimbabwe Harare Africa 16320537 15669666 14154937 12839771 11834676 10113893 7049926 5202918 390757 41.7665 1.0204 0.20
In [7]:
# Summary Statistics
data.describe()
Out[7]:
Rank 2022 Population 2020 Population 2015 Population 2010 Population 2000 Population 1990 Population 1980 Population 1970 Population Area (km²) Density (per km²) Growth Rate World Population Percentage
count 234.000000 2.340000e+02 2.340000e+02 2.340000e+02 2.340000e+02 2.340000e+02 2.340000e+02 2.340000e+02 2.340000e+02 2.340000e+02 234.000000 234.000000 234.000000
mean 117.500000 3.407441e+07 3.350107e+07 3.172996e+07 2.984524e+07 2.626947e+07 2.271022e+07 1.898462e+07 1.578691e+07 5.814494e+05 452.127044 1.009577 0.427051
std 67.694165 1.367664e+08 1.355899e+08 1.304050e+08 1.242185e+08 1.116982e+08 9.783217e+07 8.178519e+07 6.779509e+07 1.761841e+06 2066.121904 0.013385 1.714977
min 1.000000 5.100000e+02 5.200000e+02 5.640000e+02 5.960000e+02 6.510000e+02 7.000000e+02 7.330000e+02 7.520000e+02 1.000000e+00 0.026100 0.912000 0.000000
25% 59.250000 4.197385e+05 4.152845e+05 4.046760e+05 3.931490e+05 3.272420e+05 2.641158e+05 2.296142e+05 1.559970e+05 2.650000e+03 38.417875 1.001775 0.010000
50% 117.500000 5.559944e+06 5.493074e+06 5.307400e+06 4.942770e+06 4.292907e+06 3.825410e+06 3.141146e+06 2.604830e+06 8.119950e+04 95.346750 1.007900 0.070000
75% 175.750000 2.247650e+07 2.144798e+07 1.973085e+07 1.915957e+07 1.576230e+07 1.186923e+07 9.826054e+06 8.817329e+06 4.304258e+05 238.933250 1.016950 0.280000
max 234.000000 1.425887e+09 1.424930e+09 1.393715e+09 1.348191e+09 1.264099e+09 1.153704e+09 9.823725e+08 8.225344e+08 1.709824e+07 23172.266700 1.069100 17.880000
In [8]:
# Show the Shape of data
data.shape
Out[8]:
(234, 17)
In [9]:
# Count the Null Data
data.isnull().sum()
Out[9]:
Rank                           0
CCA3                           0
Country/Territory              0
Capital                        0
Continent                      0
2022 Population                0
2020 Population                0
2015 Population                0
2010 Population                0
2000 Population                0
1990 Population                0
1980 Population                0
1970 Population                0
Area (km²)                     0
Density (per km²)              0
Growth Rate                    0
World Population Percentage    0
dtype: int64
In [10]:
# Continent distribution
continent_counts = data['Continent'].value_counts().reset_index()
continent_counts.columns = ['Continent', 'Count']

# Create a bar chart using Plotly
fig = px.bar(continent_counts, x='Continent', y='Count', 
             labels={'Continent': 'Continent', 'Count': 'Count'},
             title='Distribution of Countries by Continent')

# Rotate x-axis labels for better readability
fig.update_layout(xaxis=dict(tickangle=45))

# Show the plot
fig.show()
In [12]:
# Population distribution for 2022
population_2022 = data['2022 Population']

# Create a histogram using Plotly
fig = px.histogram(population_2022, nbins=20, 
                   labels={'value': '2022 Population', 'count': 'Count'},
                   title='Population Distribution in 2022')  # You can choose a different template if desired

# Show the plot
fig.show()

The dataset provided doesn't include information about ages or genders, which are typically used to represent the distribution of ages or genders in a population. If you have another dataset that includes age or gender data, please provide it, and I'd be happy to help you create visualizations based on that data

In [13]:
# Generate synthetic age data for the same number of countries as in the population data
np.random.seed(0)
age_data = np.random.randint(1, 100, len(data))

# Generate synthetic gender data for the same number of countries as in the population data
gender_data = np.random.choice(['Male', 'Female'], size=len(data), p=[0.5, 0.5])

# Add the synthetic age and gender data to the population DataFrame
data['Synthetic Age'] = age_data
data['Synthetic Gender'] = gender_data

# Visualize age distribution using Plotly
fig_age = px.histogram(data, x='Synthetic Age', nbins=20,
                       labels={'Synthetic Age': 'Age', 'count': 'Count'},
                       title='Synthetic Age Distribution')

# Show the age distribution plot
fig_age.show()

# Visualize gender distribution using Plotly
fig_gender = px.bar(data['Synthetic Gender'].value_counts().reset_index(),
                    x='index', y='Synthetic Gender',
                    labels={'index': 'Gender', 'Synthetic Gender': 'Count'},
                    title='Synthetic Gender Distribution')

# Show the gender distribution plot
fig_gender.show()
In [ ]: